
************** Spatial Regressions ******************
* spatial transformation completed previously in spgen_4_* 

************************************
* Contents
		* Radiant based: section 2 horizontal, 6 vertical, counter-clockwise.
		* 4950 cells based on k_bins of width 3 KwH 
		* Outcomes: sales and count, each in level, log+1, and ihs transformation
		* standard errors clustered by cell   
************************************

************************************
* Variables
		* cm: country-month items (second level identifier)
		* cell: 1-50 are bunchers at window = 2. (primary identifier)
		* a_bin/e_bin: individual attributes, unstandardized 
		* csd  / ccd: sales / product count in cell. 
		
		
capture log close 
clear all
set more off 
*set maxvar 30000, permanently // required to load matrix 
* ssc install reghdfe 
* ssc install ftools

* Set directory
global server "R:\WSV2\TBu_AKe\Spatial_NEW" 
cd "$server" // set working directory 

capture mkdir Wmat_Grid_XVal
global store "${server}\Wmat_Grid_XVal"

*Define sub directories
global desktop "C:\Users\hy65byfe\Desktop\smerge_0712"
*cd "$desktop" // set working directory 


cd "$store"
 log using spatial_6_regs_s_grid_1702_xval, replace   
 

			***************************
			* run regression: OLS, fe *
			***************************		

cd "C:\Users\hy65byfe\Desktop\smerge_0712\Grid_4950"	
*cd "R:\WSV2\TBu_AKe\Spatial_NEW\SCB_4950"	
use spatial_regs_grid_0111_monthly, replace  
		
		
*preserve
*spmatrix dir 
keep if year == 2014 // no changes. otherwise matrices are wrong in previous. 	
	
	
			***************************
			***** Single Matrix *****
			***************************	
		

** log and ihs transformations to the untransformed data **

		*** outcomes and polar cases (no q indexing needed, only double loop)
foreach varx of varlist csd L3_sd R_L3_sd H_L3_sd 		ccd L3_cd    R_L3_cd H_L3_cd {

   gen     log_`varx'=log(1+`varx')
   gen     ihs_`varx'=`varx'+(`varx'^2+1)^0.5
   quietly replace ihs_`varx'=log(ihs_`varx')
 }
 
		*** spatial variables *** 
forvalues q = 5(5)40 { 
	foreach varx of varlist 	W`q'_L3_sd O`q'_L3_sd 	W`q'_L3_cd O`q'_L3_cd {

   gen     log_`varx'=log(1+`varx')
   gen     ihs_`varx'=`varx'+(`varx'^2+1)^0.5
   quietly replace ihs_`varx'=log(ihs_`varx')
	}
}

			**********************************************
			***** 	Cross-validation over countries  *****
			**********************************************
			
replace ccode = 7 if ccode == 8 // make list run 1-7 (Serbia was 7, SVN was 8)


forvalues q = 5(5)40 { 

	gen v_e_`q' = . // empty vector
	
}

	gen v_e_R = . // empty rook 
	gen v_e_H = . // empty horizontal 


			**********************************************
			***** 			Regressions : Log		******
			**********************************************

		** WEST **	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 5(5)40 { // loop over directions 
	
		reghdfe  log_csd    log_L3_sd 	log_W`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_W`q'
		predict log_W`q'_`i', xb
		gen err_W`q'_`i' =  log_csd - log_W`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_W`q'_`i' if ccode == `i' // copy to empty vector 
		 /*check predictions 
		sum err_W`q'_`i' if ccode == 1 // 600
		sum err_W`q'_`i' // 4,200 
		*/
	}
}

	** Calculate rmse **
	
forvalues q = 5(5)40 { 
	gen rmse_W`q'_log = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_W`q'_log = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_W*


		** EAST **	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 5(5)40 { // loop over directions 
	
		reghdfe  log_csd    log_L3_sd 	log_O`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_O`q'
		predict log_O`q'_`i', xb
		gen err_O`q'_`i' =  log_csd - log_O`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_O`q'_`i' if ccode == `i' // copy to empty vector 
		 /*check predictions 
		sum err_O`q'_`i' if ccode == 1 // 600
		sum err_O`q'_`i' // 4,200 
		*/
	}
}

	** Check error structure ** 
* sum err*	// confirms convergence to vertical after 30. 
	

forvalues q = 5(5)40 { 
	gen rmse_O`q'_log = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_O`q'_log = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_O*

	************************ Compare rmse *****************************
sum rmse_W*

sum rmse_O*
		
		
	***************** Polar Cases ****************************

		
forvalues i = 1(1)7 {	// loop over countries 

		** Rook **
		reghdfe  log_csd    log_L3_sd 	log_R_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_R
		predict log_R_`i', xb
		gen err_R_`i' =  log_csd - log_R_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_R = err_R_`i' if ccode == `i' // copy to empty vector 
		
		** Horizontal **
		reghdfe  log_csd    log_L3_sd 	log_H_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store log_H
		predict log_H_`i', xb
		gen err_H_`i' =  log_csd - log_H_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_H = err_H_`i' if ccode == `i' // copy to empty vector 
	
}

	********************** Calculate rmse *****************************
	gen rmse_R_log = . 
	egen aux_rmse_R = sum(v_e_R^2) if v_e_R != .  // empty vector rmse
	replace rmse_R_log = (aux_rmse_R/4200)^0.5  

	quietly drop aux_rmse_* err_R*
	
	gen rmse_H_log = . 
	egen aux_rmse_H = sum(v_e_H^2) if v_e_H != .  // empty vector rmse
	replace rmse_H_log = (aux_rmse_H/4200)^0.5  

	quietly drop aux_rmse_* err_H*

	replace v_e_R = . // reset 
	replace v_e_H = . // reset 

	************************ compare rmse *****************************
sum rmse_R*  rmse_H*

sum rmse_W*_log

sum rmse_O*_log
	
	
			**********************************************
			***** 			Regressions : IHS		******
			**********************************************

		** WEST **	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 5(5)40 { // loop over directions 
	
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_W`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_W`q'
		predict ihs_W`q'_`i', xb
		gen err_W`q'_`i' =  ihs_csd - ihs_W`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_W`q'_`i' if ccode == `i' // copy to empty vector 
		 /*check predictions 
		sum err_W`q'_`i' if ccode == 1 // 600
		sum err_W`q'_`i' // 4,200 
		*/
	}
}

	** Calculate rmse **
	
forvalues q = 5(5)40 { 
	gen rmse_W`q'_ihs = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_W`q'_ihs = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_W*


		** EAST **	
forvalues i = 1(1)7 {	// loop over countries 
	forvalues q = 5(5)40 { // loop over directions 
	
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_O`q'_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_O`q'
		predict ihs_O`q'_`i', xb
		gen err_O`q'_`i' =  ihs_csd - ihs_O`q'_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_`q' = err_O`q'_`i' if ccode == `i' // copy to empty vector 
		 /*check predictions 
		sum err_O`q'_`i' if ccode == 1 // 600
		sum err_O`q'_`i' // 4,200 
		*/
	}
}

	** Check error structure ** 
* sum err*	// confirms convergence to vertical after 30. 
	

forvalues q = 5(5)40 { 
	gen rmse_O`q'_ihs = . 
	egen aux_rmse_`q' = sum(v_e_`q'^2) if v_e_`q' != .  // empty vector rmse
	replace rmse_O`q'_ihs = (aux_rmse_`q'/4200)^0.5 
	
	replace v_e_`q' = . 
}

quietly drop aux_rmse_* err_O*

	************************ Compare rmse *****************************
sum rmse_W*

sum rmse_O*
		
		
	***************** Polar Cases ****************************

		** Rook **
forvalues i = 1(1)7 {	// loop over countries 

		reghdfe  ihs_csd    ihs_L3_sd 	ihs_R_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_R
		predict ihs_R_`i', xb
		gen err_R_`i' =  ihs_csd - ihs_R_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_R = err_R_`i' if ccode == `i' // copy to empty vector 
		
		** Horizontal **
		reghdfe  ihs_csd    ihs_L3_sd 	ihs_H_L3_sd								if cell <= 50 & year == 2014 & ccode != `i', absorb(cell ccode month) vce(cluster cell) //  excl. country i
		estimates store ihs_H
		predict ihs_H_`i', xb
		gen err_H_`i' =  ihs_csd - ihs_H_`i' 										if cell <= 50 & year == 2014  // note: residuals option in reghdfe is faster, but does not predict out-of-sample 
		replace v_e_H = err_H_`i' if ccode == `i' // copy to empty vector 
	
}

	********************** Calculate rmse *****************************
	gen rmse_R_ihs = . 
	egen aux_rmse_R = sum(v_e_R^2) if v_e_R != .  // empty vector rmse
	replace rmse_R_ihs = (aux_rmse_R/4200)^0.5  

	quietly drop aux_rmse_* err_R*
	
	gen rmse_H_ihs = . 
	egen aux_rmse_H = sum(v_e_H^2) if v_e_H != .  // empty vector rmse
	replace rmse_H_ihs = (aux_rmse_H/4200)^0.5  

	quietly drop aux_rmse_* err_H*


	************************ Compare rmse *****************************
sum rmse_R*  rmse_H*

sum rmse_W*_log

sum rmse_O*_log

sum rmse_W*_ihs

sum rmse_O*_ihs
	
capture log close 

cd "$store"
log using results_summary_s_grid_1702_xval, replace 	

	
		*******************************
		**** Results summary **********
		*******************************
		

			*** logs ***

sum rmse_R*_log  rmse_H*_log // R = vertical, H = horizontal 

sum rmse_W*_log // West 

sum rmse_O*_log // East

		   *** ihs ***
		   
sum rmse_R*_ihs  rmse_H*_ihs // R = vertical, H = horizontal 

sum rmse_W*_ihs // West 

sum rmse_O*_ihs // East

log close 

clear 

